unpacking.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. """Utilities related archives.
  2. """
  3. from __future__ import absolute_import
  4. import logging
  5. import os
  6. import shutil
  7. import stat
  8. import tarfile
  9. import zipfile
  10. from pip._internal.exceptions import InstallationError
  11. from pip._internal.utils.filetypes import (
  12. BZ2_EXTENSIONS,
  13. TAR_EXTENSIONS,
  14. XZ_EXTENSIONS,
  15. ZIP_EXTENSIONS,
  16. )
  17. from pip._internal.utils.misc import ensure_dir
  18. from pip._internal.utils.typing import MYPY_CHECK_RUNNING
  19. if MYPY_CHECK_RUNNING:
  20. from typing import Iterable, List, Optional, Text, Union
  21. from zipfile import ZipInfo
  22. logger = logging.getLogger(__name__)
  23. SUPPORTED_EXTENSIONS = ZIP_EXTENSIONS + TAR_EXTENSIONS
  24. try:
  25. import bz2 # noqa
  26. SUPPORTED_EXTENSIONS += BZ2_EXTENSIONS
  27. except ImportError:
  28. logger.debug('bz2 module is not available')
  29. try:
  30. # Only for Python 3.3+
  31. import lzma # noqa
  32. SUPPORTED_EXTENSIONS += XZ_EXTENSIONS
  33. except ImportError:
  34. logger.debug('lzma module is not available')
  35. def current_umask():
  36. # type: () -> int
  37. """Get the current umask which involves having to set it temporarily."""
  38. mask = os.umask(0)
  39. os.umask(mask)
  40. return mask
  41. def split_leading_dir(path):
  42. # type: (Union[str, Text]) -> List[Union[str, Text]]
  43. path = path.lstrip('/').lstrip('\\')
  44. if (
  45. '/' in path and (
  46. ('\\' in path and path.find('/') < path.find('\\')) or
  47. '\\' not in path
  48. )
  49. ):
  50. return path.split('/', 1)
  51. elif '\\' in path:
  52. return path.split('\\', 1)
  53. else:
  54. return [path, '']
  55. def has_leading_dir(paths):
  56. # type: (Iterable[Union[str, Text]]) -> bool
  57. """Returns true if all the paths have the same leading path name
  58. (i.e., everything is in one subdirectory in an archive)"""
  59. common_prefix = None
  60. for path in paths:
  61. prefix, rest = split_leading_dir(path)
  62. if not prefix:
  63. return False
  64. elif common_prefix is None:
  65. common_prefix = prefix
  66. elif prefix != common_prefix:
  67. return False
  68. return True
  69. def is_within_directory(directory, target):
  70. # type: ((Union[str, Text]), (Union[str, Text])) -> bool
  71. """
  72. Return true if the absolute path of target is within the directory
  73. """
  74. abs_directory = os.path.abspath(directory)
  75. abs_target = os.path.abspath(target)
  76. prefix = os.path.commonprefix([abs_directory, abs_target])
  77. return prefix == abs_directory
  78. def set_extracted_file_to_default_mode_plus_executable(path):
  79. # type: (Union[str, Text]) -> None
  80. """
  81. Make file present at path have execute for user/group/world
  82. (chmod +x) is no-op on windows per python docs
  83. """
  84. os.chmod(path, (0o777 & ~current_umask() | 0o111))
  85. def zip_item_is_executable(info):
  86. # type: (ZipInfo) -> bool
  87. mode = info.external_attr >> 16
  88. # if mode and regular file and any execute permissions for
  89. # user/group/world?
  90. return bool(mode and stat.S_ISREG(mode) and mode & 0o111)
  91. def unzip_file(filename, location, flatten=True):
  92. # type: (str, str, bool) -> None
  93. """
  94. Unzip the file (with path `filename`) to the destination `location`. All
  95. files are written based on system defaults and umask (i.e. permissions are
  96. not preserved), except that regular file members with any execute
  97. permissions (user, group, or world) have "chmod +x" applied after being
  98. written. Note that for windows, any execute changes using os.chmod are
  99. no-ops per the python docs.
  100. """
  101. ensure_dir(location)
  102. zipfp = open(filename, 'rb')
  103. try:
  104. zip = zipfile.ZipFile(zipfp, allowZip64=True)
  105. leading = has_leading_dir(zip.namelist()) and flatten
  106. for info in zip.infolist():
  107. name = info.filename
  108. fn = name
  109. if leading:
  110. fn = split_leading_dir(name)[1]
  111. fn = os.path.join(location, fn)
  112. dir = os.path.dirname(fn)
  113. if not is_within_directory(location, fn):
  114. message = (
  115. 'The zip file ({}) has a file ({}) trying to install '
  116. 'outside target directory ({})'
  117. )
  118. raise InstallationError(message.format(filename, fn, location))
  119. if fn.endswith('/') or fn.endswith('\\'):
  120. # A directory
  121. ensure_dir(fn)
  122. else:
  123. ensure_dir(dir)
  124. # Don't use read() to avoid allocating an arbitrarily large
  125. # chunk of memory for the file's content
  126. fp = zip.open(name)
  127. try:
  128. with open(fn, 'wb') as destfp:
  129. shutil.copyfileobj(fp, destfp)
  130. finally:
  131. fp.close()
  132. if zip_item_is_executable(info):
  133. set_extracted_file_to_default_mode_plus_executable(fn)
  134. finally:
  135. zipfp.close()
  136. def untar_file(filename, location):
  137. # type: (str, str) -> None
  138. """
  139. Untar the file (with path `filename`) to the destination `location`.
  140. All files are written based on system defaults and umask (i.e. permissions
  141. are not preserved), except that regular file members with any execute
  142. permissions (user, group, or world) have "chmod +x" applied after being
  143. written. Note that for windows, any execute changes using os.chmod are
  144. no-ops per the python docs.
  145. """
  146. ensure_dir(location)
  147. if filename.lower().endswith('.gz') or filename.lower().endswith('.tgz'):
  148. mode = 'r:gz'
  149. elif filename.lower().endswith(BZ2_EXTENSIONS):
  150. mode = 'r:bz2'
  151. elif filename.lower().endswith(XZ_EXTENSIONS):
  152. mode = 'r:xz'
  153. elif filename.lower().endswith('.tar'):
  154. mode = 'r'
  155. else:
  156. logger.warning(
  157. 'Cannot determine compression type for file %s', filename,
  158. )
  159. mode = 'r:*'
  160. tar = tarfile.open(filename, mode)
  161. try:
  162. leading = has_leading_dir([
  163. member.name for member in tar.getmembers()
  164. ])
  165. for member in tar.getmembers():
  166. fn = member.name
  167. if leading:
  168. # https://github.com/python/mypy/issues/1174
  169. fn = split_leading_dir(fn)[1] # type: ignore
  170. path = os.path.join(location, fn)
  171. if not is_within_directory(location, path):
  172. message = (
  173. 'The tar file ({}) has a file ({}) trying to install '
  174. 'outside target directory ({})'
  175. )
  176. raise InstallationError(
  177. message.format(filename, path, location)
  178. )
  179. if member.isdir():
  180. ensure_dir(path)
  181. elif member.issym():
  182. try:
  183. # https://github.com/python/typeshed/issues/2673
  184. tar._extract_member(member, path) # type: ignore
  185. except Exception as exc:
  186. # Some corrupt tar files seem to produce this
  187. # (specifically bad symlinks)
  188. logger.warning(
  189. 'In the tar file %s the member %s is invalid: %s',
  190. filename, member.name, exc,
  191. )
  192. continue
  193. else:
  194. try:
  195. fp = tar.extractfile(member)
  196. except (KeyError, AttributeError) as exc:
  197. # Some corrupt tar files seem to produce this
  198. # (specifically bad symlinks)
  199. logger.warning(
  200. 'In the tar file %s the member %s is invalid: %s',
  201. filename, member.name, exc,
  202. )
  203. continue
  204. ensure_dir(os.path.dirname(path))
  205. assert fp is not None
  206. with open(path, 'wb') as destfp:
  207. shutil.copyfileobj(fp, destfp)
  208. fp.close()
  209. # Update the timestamp (useful for cython compiled files)
  210. # https://github.com/python/typeshed/issues/2673
  211. tar.utime(member, path) # type: ignore
  212. # member have any execute permissions for user/group/world?
  213. if member.mode & 0o111:
  214. set_extracted_file_to_default_mode_plus_executable(path)
  215. finally:
  216. tar.close()
  217. def unpack_file(
  218. filename, # type: str
  219. location, # type: str
  220. content_type=None, # type: Optional[str]
  221. ):
  222. # type: (...) -> None
  223. filename = os.path.realpath(filename)
  224. if (
  225. content_type == 'application/zip' or
  226. filename.lower().endswith(ZIP_EXTENSIONS) or
  227. zipfile.is_zipfile(filename)
  228. ):
  229. unzip_file(
  230. filename,
  231. location,
  232. flatten=not filename.endswith('.whl')
  233. )
  234. elif (
  235. content_type == 'application/x-gzip' or
  236. tarfile.is_tarfile(filename) or
  237. filename.lower().endswith(
  238. TAR_EXTENSIONS + BZ2_EXTENSIONS + XZ_EXTENSIONS
  239. )
  240. ):
  241. untar_file(filename, location)
  242. else:
  243. # FIXME: handle?
  244. # FIXME: magic signatures?
  245. logger.critical(
  246. 'Cannot unpack file %s (downloaded from %s, content-type: %s); '
  247. 'cannot detect archive format',
  248. filename, location, content_type,
  249. )
  250. raise InstallationError(
  251. 'Cannot determine archive format of {}'.format(location)
  252. )